cd "C:\Users\jedwab\Desktop\Replication Files for The Employment Profile of Cities around the World WD\Mapping"

* The do-file below creates two main data sets that are used to create the tables and figures.
* data_for_mapping.dta 
* temp_for_map

*************
*************
**# DATA #
*************
*************

**************************************
*** INFORMAL EMPLOYMENT FROM IPUMS ***
**************************************

* Data set created in the "IPUMS and other files" FOLDER
use "dataset_maps_and_cities_classwk_01.22.2022.dta", clear
* We drop India since the data comes from an employment survey that could over-represent the formal sector. Likewise for Italy. 
drop if country == "India"
drop if country == "Italy"
* From Federico
*India - Employment Survey (https://international.ipums.org/international-action/sample_details/country/in#tab_Surveys})
*Italy - Labor Force Survey (is it akin to an employment survey?) (https://international.ipums.org/international-action/sample_details/country/it#tab_Surveys})
codebook country country_year
* 72 countries and 189 country-years
* Not all censuses use all classes of workers. 
* We always know if that is the case or not and thus replace by . if the response is not available.
foreach X in selfemployed wage_worker unpaidworker other unknownmissing {
replace share_`X' = . if q_`X' == 0
sum share_`X'
}
* We verify that the sum of shares is equal to 1 or we drop the ones for which the sum is inconsistent *
egen test = rsum(share_selfemployed-share_unknownmissing)
sum test, d
* We drop if 0s
sum test if test <= 0.95, d
drop if test <= 0.95
drop test
* We drop the "unknownmissing" as we do not need it for the analysis.
drop q_* _* share_unknownmissing
* We then change the shares so that the sum is exactly equal to 1.
egen sum = rsum(share_selfemployed-share_other)
foreach X of varlist share_selfemployed-share_other {
replace `X' = `X'/sum*100
}
drop sum 
egen sum = rsum(share_selfemployed-share_other)
sum sum, d
* OK, 100 now.
drop sum
count
* 11678
* We select the closest observations to the year 2000.
gen dist2000 = abs(year-2000)
gsort +country +efua_id +dist2000 -year
order country efua_id dist2000 year 
save temp_informal, replace
* We verify it selects Indonesia 2005, not 1995, so the more recent one.
bysort country efua_id: keep if _n == 1
tab year
* We drop the years pre-1990.
tab country if year >= 1985 & year < 1990
drop if year < 1990
ren country_year countryyear
codebook country
codebook countryyear
sum year
* 70 countries in 79 country-years
keep efua_id countryyear share*
* We rename the shares *
ren share_selfemployed sh_self
ren share_wage_worker sh_wage
ren share_unpaidworker sh_unpaid
ren share_other sh_other
sort efua_id
save "dataset_maps_and_cities_selfempl", replace

***************************
*** INDUSTRY FROM IPUMS ***
***************************

* Data set created in the "IPUMS and other files" FOLDER
use "dataset_maps_and_cities_01.22.2022.dta", clear
gen countryyear = country+string(year)
* Not all censuses use all industries. 
* We know if that is the case or not and thus replace by . if the response is not available.
foreach X of varlist q_* {
tab `X', m
}
foreach X in agri mining mfg utilities construction trade hospitality transport fin_insu govmt bussserv_rs educ health other_serv house_serv unknown other_industry serv_notsp resp_supressed {
sum share_`X'
replace share_`X' = 1 if share_`X' != . & share_`X' >= 1
}
foreach X in agri mining mfg utilities construction trade hospitality transport fin_insu govmt bussserv_rs educ health other_serv house_serv unknown other_industry serv_notsp resp_supressed {
replace share_`X' = . if q_`X' == 0
}
foreach X in agri mining mfg utilities construction trade hospitality transport fin_insu govmt bussserv_rs educ health other_serv house_serv unknown other_industry serv_notsp resp_supressed {
sum share_`X'
}
drop q_* _* 
* We verify if the sum of shares is equal to 1 or we drop if the data is too inconsistent *
egen test = rsum(share_agri-share_resp_supressed)
sum test, d
tab countryyear if test <= 0.95
drop if test <= 0.95
sum share_*
sum share_unknown, d
drop if share_unknown == 1
drop test 
* We modify the shares to remove the observations with missing information *
desc share*, f
drop share_resp_supressed share_unknown
desc share*, f
egen sum = rsum(share_agri-share_serv_notsp)
foreach X of varlist share_agri-share_serv_notsp {
replace `X' = `X'/sum*100
}
drop sum 
egen sum = rsum(share_agri-share_serv_notsp)
sum sum, d
drop sum
* We now create the correct shares for FIRE and MFG+FIRE
egen share_fire = rsum(share_fin_insu share_bussser)
egen share_mfgfire = rsum(share_fire share_mfg)
tab countryyear
codebook country countryyear
save "dataset_maps_and_cities_clean_allyrs", replace
* We now keep the observations closest to 2000.
gen dist2000 = abs(year-2000)
gsort +country +efua_id +dist2000 -year
order country efua_id dist2000 year 
* We verify it selects Indonesia 2005, not 1995, so the more recent one.
bysort country efua_id: keep if _n == 1
tab year
* We drop the years pre-1990.
tab country if year >= 1985 & year < 1990
drop if year < 1990
save "dataset_maps_and_cities_clean", replace

* We check the sample size 
use "dataset_maps_and_cities_clean", clear
codebook country
codebook countryyear
* 74 countries and 84 country-years 
keep efua_id
sort efua_id
save efua_id, replace
count
* 6865

* List of country-years for the data set with industry data 
use "dataset_maps_and_cities_clean", clear
keep country year countryyear
bysort countryyear: keep if _n == 1
sort country year
save list_industry_map, replace

* We combine the data sets 
use "dataset_maps_and_cities_clean", clear
sort efua_id
merge efua_id using "dataset_maps_and_cities_selfempl"
tab _m
drop if _m == 2
drop _m
save "dataset_maps_and_cities_clean", replace

* We label the variables *
label var country "Country"
label var countryyear "Country-Year"
label var dist2000 "Distance to the year 2000"
label var year "Year"
label var efua_name "FUA name"
order country countryyear year dist2000 x y capital efua_name *fua_pop_2015* p*adj* share_* sh_*  
label var share_fire "Share of people: FIRE"
label var share_mfgfire "Share of people: MFG+FIRE"
save "dataset_maps_and_cities_clean", replace

***********************************************
**# CREATION OF THE PRODUCTION CITY MEASURE #
***********************************************

use "dataset_maps_and_cities_clean", clear
** Creation of the population categories **
gen log_fua_pop = log_fua_pop_2015
gen fua_pop = fua_pop_2015
sum log_fua_pop_2015
sum fua_pop_2015
* goes from 10.82 (50K) to 17.2 (30 millions)
gsort- fua_pop_2015
sum log_fua_pop, d
egen max_log_fua_pop = max(log_fua_pop)
egen min_log_fua_pop = min(log_fua_pop)
gen diff_log_fua_pop = max_log_fua_pop - min_log_fua_pop
sum diff_log_fua_pop
replace diff_log_fua_pop = diff_log_fua_pop/10
sum diff_log_fua_pop
* 10 CATEGORIES * 
gen catsize = .
replace catsize = 1 if log_fua_pop >= min_log_fua_pop & log_fua_pop <= (min_log_fua_pop+diff_log_fua_pop)
replace catsize = 2 if log_fua_pop >= (min_log_fua_pop+diff_log_fua_pop) & log_fua_pop <= (min_log_fua_pop+2*diff_log_fua_pop)
foreach N of numlist 3(1)9 {
replace catsize = `N' if log_fua_pop >= (min_log_fua_pop+(`N'-1)*diff_log_fua_pop) & log_fua_pop <= (min_log_fua_pop+`N'*diff_log_fua_pop)
}
replace catsize = 10 if log_fua_pop >= (min_log_fua_pop+9*diff_log_fua_pop) & log_fua_pop <= max_log_fua_pop
bysort catsize: sum fua_pop
* We obtain their hypothetical mid-pop.
gen catsize_midpop = . 
replace catsize_midpop = (min_log_fua_pop + (min_log_fua_pop+diff_log_fua_pop))/2 if catsize == 1
replace catsize_midpop = ((min_log_fua_pop+diff_log_fua_pop) + (min_log_fua_pop+2*diff_log_fua_pop))/2 if catsize == 2
foreach N of numlist 3(1)9 {
replace catsize_midpop = ((min_log_fua_pop+(`N'-1)*diff_log_fua_pop) + (min_log_fua_pop+`N'*diff_log_fua_pop))/2 if catsize == `N'
}
replace catsize_midpop = ((min_log_fua_pop+9*diff_log_fua_pop) + max_log_fua_pop)/2 if catsize == 10
** How many FUAs per category ** 
tab catsize, m
bysort catsize: sum catsize_midpop
bysort catsize: sum fua_pop_2015
bysort catsize: sum log_fua_pop_2015
tab efua_name if catsize == 10
tab efua_name if catsize == 9
tab fua_pop if catsize == 9
gsort -fua_pop
order catsize efua_name fua_pop 
gen catsizeraw = catsize
* We combine the two top categories to have enough cities there.
replace catsize = 9 if catsize == 10
* 5 CATEGORIES *
tab catsize
gen catsize5 = 1 if catsize == 1 | catsize == 2
replace catsize5 = 2 if catsize == 3 | catsize == 4
replace catsize5 = 3 if catsize == 5 | catsize == 6
replace catsize5 = 4 if catsize == 7 | catsize == 8
replace catsize5 = 5 if catsize == 9 | catsize == 10
tab catsize catsize5
* We label the variables
label var fua_pop "FUA pop. 2015"
label var log_fua_pop "Log FUA pop. 2015"
drop max_log* min_log* diff_log_fua_pop
order efua_name efua_id country countryyear country_year year dist2000 x y capital *fua_pop_2015* p*adj* share_* sh_* catsize catsizeraw catsize5
label var catsize "Pop category (9)"
label var catsizeraw "Pop category (10)"
label var catsize_midpop "Mid-pop for each pop category (9)"
label var catsize5 "Pop category (5)"
save temp, replace

*********************************************************************************
*** CREATION OF THE WEIGHTS TO MAKE THE DATA MORE REPRESENTATIVE OF THE WORLD ***
*********************************************************************************

* List of cities with data * 
use temp, clear 
gen data_yn = 1
keep data_yn efua_id
sort efua_id
save data_yn, replace 

* Urbanization rate 2000
* Created in the folder "Stata"
use "urbshwup2000.dta", clear
gen cntry_name = country
replace cntry_name = "Brunei" if cntry_name == "BruneiDarussalam"
replace cntry_name = "BurkinaFaso" if cntry_name == "Burkina Faso"
replace cntry_name = "CapeVerde" if cntry_name == "CaboVerde"
replace cntry_name = "CostaRica" if cntry_name == "Costa Rica"
replace cntry_name = "CotedIvoire" if cntry_name == "Côted'Ivoire"
replace cntry_name = "Curacao" if cntry_name == "Curaçao"
replace cntry_name = "CzechRepublic" if cntry_name == "Czechia"
replace cntry_name = "DominicanRepublic" if cntry_name == "Dominican Republic"
replace cntry_name = "ElSalvador" if cntry_name == "El Salvador"
replace cntry_name = "GuineaBissau" if cntry_name == "Guinea-Bissau"
replace cntry_name = "HongKong" if cntry_name == "China,HongKongSAR"
replace cntry_name = "Kyrgyzstan" if cntry_name == "Kyrgyz Republic"
replace cntry_name = "Macao" if cntry_name == "China,MacaoSAR"
replace cntry_name = "Macedonia" if cntry_name == "TFYRMacedonia"
replace cntry_name = "Moldova" if cntry_name == "RepublicofMoldova"
replace cntry_name = "NorthKorea" if cntry_name == "Dem.People'sRepublicofKorea"
replace cntry_name = "Palestina" if cntry_name == "Palestine"
replace cntry_name = "PapuaNewGuinea" if cntry_name == "Papua New Guinea"
replace cntry_name = "PuertoRico" if cntry_name == "Puerto Rico"
replace cntry_name = "RepublicofCongo" if cntry_name == "Congo"
replace cntry_name = "Reunion" if cntry_name == "Réunion"
replace cntry_name = "SierraLeone" if cntry_name == "Sierra Leone"
replace cntry_name = "SouthAfrica" if cntry_name == "South Africa"
replace cntry_name = "SouthKorea" if cntry_name == "RepublicofKorea"
replace cntry_name = "SouthSudan" if cntry_name == "South Sudan"
replace cntry_name = "Syria" if cntry_name == "SyrianArabRepublic"
replace cntry_name = "Taiwan" if cntry_name == "China,TaiwanProvinceofChina"
replace cntry_name = "TimorLeste" if cntry_name == "Timor-Leste"
replace cntry_name = "TrinidadandTobago" if cntry_name == "Trinidad and Tobago"
replace cntry_name = "UnitedKingdom" if cntry_name == "United Kingdom"
replace cntry_name = "UnitedStates" if cntry_name == "United States"
sort cntry_name
save urbshwup2000_2, replace

* Population 2000
* Created in the folder "Stata"
use "popwup2000.dta", clear
gen cntry_name = country
replace cntry_name = "Brunei" if cntry_name == "BruneiDarussalam"
replace cntry_name = "BurkinaFaso" if cntry_name == "Burkina Faso"
replace cntry_name = "CapeVerde" if cntry_name == "CaboVerde"
replace cntry_name = "CostaRica" if cntry_name == "Costa Rica"
replace cntry_name = "CotedIvoire" if cntry_name == "Côted'Ivoire"
replace cntry_name = "Curacao" if cntry_name == "Curaçao"
replace cntry_name = "CzechRepublic" if cntry_name == "Czechia"
replace cntry_name = "DominicanRepublic" if cntry_name == "Dominican Republic"
replace cntry_name = "ElSalvador" if cntry_name == "El Salvador"
replace cntry_name = "GuineaBissau" if cntry_name == "Guinea-Bissau"
replace cntry_name = "HongKong" if cntry_name == "China,HongKongSAR"
replace cntry_name = "Kyrgyzstan" if cntry_name == "Kyrgyz Republic"
replace cntry_name = "Macao" if cntry_name == "China,MacaoSAR"
replace cntry_name = "Macedonia" if cntry_name == "NorthMacedonia"
replace cntry_name = "Moldova" if cntry_name == "RepublicofMoldova"
replace cntry_name = "NorthKorea" if cntry_name == "Dem.People'sRepublicofKorea"
replace cntry_name = "Palestina" if cntry_name == "Palestine"
replace cntry_name = "PapuaNewGuinea" if cntry_name == "Papua New Guinea"
replace cntry_name = "PuertoRico" if cntry_name == "Puerto Rico"
replace cntry_name = "RepublicofCongo" if cntry_name == "Congo"
replace cntry_name = "Reunion" if cntry_name == "Réunion"
replace cntry_name = "SierraLeone" if cntry_name == "Sierra Leone"
replace cntry_name = "SouthAfrica" if cntry_name == "South Africa"
replace cntry_name = "SouthKorea" if cntry_name == "RepublicofKorea"
replace cntry_name = "SouthSudan" if cntry_name == "South Sudan"
replace cntry_name = "Syria" if cntry_name == "SyrianArabRepublic"
replace cntry_name = "Taiwan" if cntry_name == "China,TaiwanProvinceofChina"
replace cntry_name = "TimorLeste" if cntry_name == "Timor-Leste"
replace cntry_name = "TrinidadandTobago" if cntry_name == "Trinidad and Tobago"
replace cntry_name = "UnitedKingdom" if cntry_name == "United Kingdom"
replace cntry_name = "UnitedStates" if cntry_name == "United States"
replace cntry_name = "FaeroeIslands" if cntry_name == "FaroeIslands"
replace cntry_name = "Swaziland" if cntry_name == "Eswatini"
sort cntry_name
save popwup2000_2, replace

* Per capita GDP
* Source: World Development Indicators of the World Bank  
* Last accessed: 01-14-2021
* Variable: GDP per capita, PPP (constant 2017 international $) * 
clear
import excel "Data_Extract_From_World_Development_Indicators (19).xlsx", sheet("Data") firstrow clear
drop if _n >= 218
* Data from 1990 to 2019 only
* We take the average around 2000
egen pcgdp2000_ma5 = rmean(y1995-y2005)
egen pcgdp2000_ma10 = rmean(y1990-y2010)
keep country_wb pcgdp2000_ma5 pcgdp2000_ma10
gen pcgdp2000 = pcgdp2000_ma5 
replace pcgdp2000 = pcgdp2000_ma10 if pcgdp2000 == .
keep country_wb pcgdp2000
gen cntry_name = country_wb
replace cntry_name = subinstr(cntry_name, " ", "",.) 
replace cntry_name = subinstr(cntry_name, "-", "",.) 
replace cntry_name = "Bahamas" if cntry_name == "Bahamas,The"
replace cntry_name = "Brunei" if cntry_name == "BruneiDarussalam"
replace cntry_name = "CapeVerde" if cntry_name == "CaboVerde"
replace cntry_name = "CotedIvoire" if cntry_name == "Coted'Ivoire"
replace cntry_name = "DemocraticRepublicoftheCongo" if cntry_name == "Congo,Dem.Rep."
replace cntry_name = "Egypt" if cntry_name == "Egypt,ArabRep."
replace cntry_name = "Gambia" if cntry_name == "Gambia,The"
replace cntry_name = "HongKong" if cntry_name == "HongKongSAR,China"
replace cntry_name = "Iran" if cntry_name == "Iran,IslamicRep."
replace cntry_name = "Kyrgyzstan" if cntry_name == "KyrgyzRepublic"
replace cntry_name = "Laos" if cntry_name == "LaoPDR"
replace cntry_name = "Macao" if cntry_name == "MacaoSAR,China"
replace cntry_name = "Macedonia" if cntry_name == "NorthMacedonia"
replace cntry_name = "NorthKorea" if cntry_name == "Korea,Dem.People’sRep."
replace cntry_name = "Palestina" if cntry_name == "WestBankandGaza"
replace cntry_name = "RepublicofCongo" if cntry_name == "Congo,Rep."
replace cntry_name = "Russia" if cntry_name == "RussianFederation"
replace cntry_name = "Slovakia" if cntry_name == "SlovakRepublic"
replace cntry_name = "SouthKorea" if cntry_name == "Korea,Rep."
replace cntry_name = "Swaziland" if cntry_name == "Eswatini"
replace cntry_name = "Syria" if cntry_name == "SyrianArabRepublic"
replace cntry_name = "Venezuela" if cntry_name == "Venezuela,RB"
replace cntry_name = "Yemen" if cntry_name == "Yemen,Rep."
sort cntry_name
save pcgdp2000, replace

* We merge with the full list of FUAs * 
* Created in the folder "Stata"
use fuas, clear
count
sort efua_id 
merge efua_id using data_yn
tab _m
drop _m
* We add the urbanization rate and population in 2000.
sort cntry_name
merge cntry_name using urbshwup2000_2
tab _m
tab cntry_name if _m == 1
drop if _m == 2
drop _m
sort cntry_name
merge cntry_name using popwup2000_2
tab _m
tab cntry_name if _m == 1
drop if _m == 2
drop _m
gen ccode = cntry_iso
replace fua_p_2015 = subinstr(fua_p_2015, ",", "",.) 
destring fua_p_2015, replace 
gen lpop = log(fua_p_2015)
replace data_yn = 0 if data_yn == .
collapse (max) data_yn urbshwup2000 popwup2000, by(cntry_iso cntry_name)
drop if urbshwup2000 == . | popwup2000 == .
gen upopwup2000 = popwup2000/100*urbshwup2000
count
sum urbshwup2000, d
sort cntry_name
merge cntry_name using pcgdp2000
tab _m
tab cntry_name if _m == 1
drop if _m == 1 & cntry_name != "Taiwan"
drop if _m == 2
drop _m
tab cntry_name if pcgdp == .
* according to IMF estimates or UN estimates in 2000 * 
* Source = https://en.wikipedia.org/wiki/List_of_countries_by_past_and_projected_GDP_(nominal)_per_capita#IMF_estimates_between_2000_and_2009
* IMF = Taiwan had same income as Spain
foreach X in Spain {
foreach Y in Taiwan {
gen pcgdp_temp = pcgdp2000 if cntry_name == "`X'"
egen maxpcgdp_temp = max(pcgdp_temp)
replace pcgdp2000 = maxpcgdp_temp if cntry_name == "`Y'"
sum pcgdp2000 if cntry_name == "`Y'"
drop *pcgdp_temp
}
}
* IMF = Venezuela = Croatia
foreach X in Croatia {
foreach Y in Venezuela {
gen pcgdp_temp = pcgdp2000 if cntry_name == "`X'"
egen maxpcgdp_temp = max(pcgdp_temp)
replace pcgdp2000 = maxpcgdp_temp if cntry_name == "`Y'"
sum pcgdp2000 if cntry_name == "`Y'"
drop *pcgdp_temp
}
}
* UN = Cuba = Dominican Republic
foreach X in DominicanRepublic {
foreach Y in Cuba {
gen pcgdp_temp = pcgdp2000 if cntry_name == "`X'"
egen maxpcgdp_temp = max(pcgdp_temp)
replace pcgdp2000 = maxpcgdp_temp if cntry_name == "`Y'"
sum pcgdp2000 if cntry_name == "`Y'"
drop *pcgdp_temp
}
}
* UN = Djibouti = Bhutan
foreach X in Indonesia {
foreach Y in Djibouti {
gen pcgdp_temp = pcgdp2000 if cntry_name == "`X'"
egen maxpcgdp_temp = max(pcgdp_temp)
replace pcgdp2000 = maxpcgdp_temp if cntry_name == "`Y'"
sum pcgdp2000 if cntry_name == "`Y'"
drop *pcgdp_temp
}
}
* UN = Eritrea = SierraLeone 
foreach X in SierraLeone {
foreach Y in Eritrea {
gen pcgdp_temp = pcgdp2000 if cntry_name == "`X'"
egen maxpcgdp_temp = max(pcgdp_temp)
replace pcgdp2000 = maxpcgdp_temp if cntry_name == "`Y'"
sum pcgdp2000 if cntry_name == "`Y'"
drop *pcgdp_temp
}
}
* UN = NorthKorea = Kenya
foreach X in Kenya {
foreach Y in NorthKorea {
gen pcgdp_temp = pcgdp2000 if cntry_name == "`X'"
egen maxpcgdp_temp = max(pcgdp_temp)
replace pcgdp2000 = maxpcgdp_temp if cntry_name == "`Y'"
sum pcgdp2000 if cntry_name == "`Y'"
drop *pcgdp_temp
}
}
* UN = Somalia = Uganda
foreach X in Uganda {
foreach Y in Somalia {
gen pcgdp_temp = pcgdp2000 if cntry_name == "`X'"
egen maxpcgdp_temp = max(pcgdp_temp)
replace pcgdp2000 = maxpcgdp_temp if cntry_name == "`Y'"
sum pcgdp2000 if cntry_name == "`Y'"
drop *pcgdp_temp
}
}
* IMF = Syria = Kazakhstan
foreach X in Kazakhstan {
foreach Y in Syria {
gen pcgdp_temp = pcgdp2000 if cntry_name == "`X'"
egen maxpcgdp_temp = max(pcgdp_temp)
replace pcgdp2000 = maxpcgdp_temp if cntry_name == "`Y'"
sum pcgdp2000 if cntry_name == "`Y'"
drop *pcgdp_temp
}
}
* IMF = Yemen = Uzbekistan
foreach X in Uzbekistan {
foreach Y in Yemen {
gen pcgdp_temp = pcgdp2000 if cntry_name == "`X'"
egen maxpcgdp_temp = max(pcgdp_temp)
replace pcgdp2000 = maxpcgdp_temp if cntry_name == "`Y'"
sum pcgdp2000 if cntry_name == "`Y'"
drop *pcgdp_temp
}
}
* UN = South Sudan = Sudan
foreach X in Sudan {
foreach Y in SouthSudan {
gen pcgdp_temp = pcgdp2000 if cntry_name == "`X'"
egen maxpcgdp_temp = max(pcgdp_temp)
replace pcgdp2000 = maxpcgdp_temp if cntry_name == "`Y'"
sum pcgdp2000 if cntry_name == "`Y'"
drop *pcgdp_temp
}
}
tab cntry_name if pcgdp2000 == .
* We ignore these small countries/islands
drop if pcgdp2000 == .
count
* 178
gen lpcgdp2000 = log(pcgdp2000)
sum lpcgdp2000, d
xtile pcgdpcat = lpcgdp2000, nq(10)
tab pcgdpcat
save temp_k, replace
keep cntry_iso cntry_name country_wb lpcgdp2000
sort cntry_name
save lpcgdp2000, replace

use temp_k, clear
replace upopwup2000 = round(upopwup2000,1)
gen upopwup2000_data = upopwup2000 if data_yn
collapse (sum) upopwup2000 upopwup2000_data, by(pcgdpcat)
egen sumupop = sum(upopwup2000)
gen sh_world = upopwup2000/sumupop
egen sumupop_data = sum(upopwup2000_data)
gen sh_data = upopwup2000_data/sumupop_data
gen ratio = sh_world/sh_data
keep ratio pcgdpcat
sort pcgdpcat
save ratio, replace

use temp_k, clear
sort pcgdpcat
merge pcgdpcat using ratio
tab _m
drop _m
gen wt = upopwup2000*ratio
replace upopwup2000 = round(upopwup2000,1)
replace wt = round(wt,1)
keep cntry_name ratio country_wb urbshwup2000 popwup2000 upopwup2000
label var urbshwup2000 "Urbanization rate of the country 2000"
label var popwup2000  "Population of the country 2000"
label var upopwup2000 "Urban population of the country 2000"
label var ratio "Ratio used to modify the weights"
order cntry_name country_wb *2000 ratio
sort cntry_name
save ratio2, replace 

**********************************
*** URBAN DEFINITIONS CONTROLS ***
**********************************

* Source: Replication files from the following paper:
* Jedwab, Remi and Vollrath, Dietrich, (2015), Urbanization without growth in historical perspective, Explorations in Economic History, 58, issue C, p. 1-21, https://EconPapers.repec.org/RePEc:eee:exehis:v:58:y:2015:i:c:p:1-21.
use urban_definition, clear
gen cntry_name = country
replace cntry_name = "Bolivia" if cntry_name == "Bolivia (Plurinational State of)"
replace cntry_name = "DominicanRepublic" if cntry_name == "Dominican Republic"
replace cntry_name = "ElSalvador" if cntry_name == "El Salvador"
replace cntry_name = "Iran" if cntry_name == "Iran (Islamic Republic of)"
replace cntry_name = "PapuaNewGuinea" if cntry_name == "Papua New Guinea"
replace cntry_name = "SierraLeone" if cntry_name == "Sierra Leone"
replace cntry_name = "SouthAfrica" if cntry_name == "South Africa"
replace cntry_name = "Tanzania" if cntry_name == "United Republic of Tanzania"
replace cntry_name = "UnitedStates" if cntry_name == "United States of America"
replace cntry_name = "Venezuela" if cntry_name == "Venezuela (Bolivarian Republic of)"
replace cntry_name = "Vietnam" if cntry_name == "Viet Nam"
label var Admin "Urban definition: Admin function-based"
label var Threshold "Urban definition: Threshold-based"
label var Condition "Urban definition: Condition-based"
label var Admin_or_threshold "Urban definition: Admin or threshold based"
label var Threshold_level "Urban definition: Threshold level used"
drop Condition_comment
sort cntry_name
save urban_definition2, replace 

***********************************
*** ESTIMATION OF THE RESIDUALS ***
***********************************

* We create the residuals that measure production city-ness
* temp is created above 
use temp, clear
count
* 6865
* We complete the "capital city" dummy
replace capital = 1 if capital != 0
sum share_mfgfire, d
* We modify some country names to add the information needed to create the weights that make the sample more representative globally
gen cntry_name = country 
replace cntry_name = "CostaRica" if cntry_name == "Costa Rica"
replace cntry_name = "DominicanRepublic" if cntry_name == "Dominican Republic"
replace cntry_name = "ElSalvador" if cntry_name == "El Salvador"
replace cntry_name = "Kyrgyzstan" if cntry_name == "Kyrgyz Republic"
replace cntry_name = "PapuaNewGuinea" if cntry_name == "Papua New Guinea"
replace cntry_name = "SierraLeone" if cntry_name == "Sierra Leone"
replace cntry_name = "SouthAfrica" if cntry_name == "South Africa"
replace cntry_name = "UnitedStates" if cntry_name == "United States"
replace cntry_name = "Brazil" if cntry_name == "Brasil"
replace cntry_name = "UnitedStates" if cntry_name == "Usa"
replace cntry_name = "CostaRica" if cntry_name == "Costarica"
replace cntry_name = "DominicanRepublic" if cntry_name == "Dominicanrepublic"
replace cntry_name = "ElSalvador" if cntry_name == "Elsalvador"
replace cntry_name = "Kyrgyzstan" if cntry_name == "Kyrgyz"
replace cntry_name = "PapuaNewGuinea" if cntry_name == "Papuanewguinea"
replace cntry_name = "SierraLeone" if cntry_name == "Sierraleone"
replace cntry_name = "SouthAfrica" if cntry_name == "Southafrica"
label var cntry_name "Country name"
sort cntry_name
merge cntry_name using ratio2
tab _m
tab cntry_name if _m == 1
tab cntry_name if _m == 2
drop if _m == 2
drop _m
codebook ratio
tab country if ratio == .
gen wt = fua_pop*ratio
label var wt "Weight used to make the sample more globally representative"
codebook country
count
* 6865 FUAs in 74 countries
codebook urbshwup2000
*bysort country_wb: keep if _n == 1
*keep country_wb
*sort country_wb
*save list_mapping_74, replace

*** BASELINE = REGRESSION MFG+FIRE ***
*** TABLE D1 ***
**# TABLE D1 #1
* we export the coefficients to Table D1
* We obtain the residuals of this regression as well 
gen share_mfgfire_raw = share_mfgfire
label var share_mfgfire_raw "Share of MFG and FIRE (raw)"
foreach X in mfgfire {
xi: reg share_`X' i.catsize*urbshwup2000 i.capital [w=wt], robust clust(country)
outreg2 using "tables\tableD1.xls", replace dec(2)
outreg2 using table_world_residuals.xls, replace 
outreg2 using table_world_residuals_sideway.xls, replace sideway
regsave using coef_world, pval ci replace
predict share_`X'_r, resid
drop share_`X' 
ren share_`X'_r share_`X'
}
sum share_mfgfire, d
label var share_mfgfire "Residual for MFG and FIRE"
gen dist2word_mfgfire = share_mfgfire
label var dist2word_mfgfire "Residual for MFG and FIRE"

*** REGRESSION MFG AND FIRE SEPARATELY ***
foreach X in mfg fire {
gen share_`X'_raw = share_`X'
label var share_`X'_raw "Share of `X'' (raw)"
xi: reg share_`X' i.catsize*urbshwup2000 i.capital [w=wt], robust clust(country)
predict share_`X'_r, resid
drop share_`X' 
ren share_`X'_r share_`X'
sum share_`X', d
label var share_`X' "Residual for `X'"
}
* Correlation with baseline measures *
corr share_mfgfire share_mfg [w=fua_pop]
* 0.95
corr share_mfgfire share_fire [w=fua_pop]
* 0.33

************************
**# ROBUSTNESS CHECKS #
*** REGRESSION MFG+FIRE, ROBUSTNESS CHECKS, DIFFERENT SPECIFICATIONS ***

* SQUARE, CUBE AND FOURTH OF URBANIZATION TOO *
gen urbshwup2000_sq = urbshwup2000*urbshwup2000
gen urbshwup2000_cub = urbshwup2000*urbshwup2000*urbshwup2000
gen urbshwup2000_four = urbshwup2000*urbshwup2000*urbshwup2000*urbshwup2000
foreach X in mfgfire {
xi: reg share_`X'_raw i.catsize*urbshwup2000 i.catsize*urbshwup2000_sq i.catsize*urbshwup2000_cub i.catsize*urbshwup2000_four i.capital [w=wt], robust clust(country)
predict share_`X'_r, resid
ren share_`X'_r dist2word_`X'_urbsq
} 
sum share_mfgfire dist2word_mfgfire_urbsq
corr share_mfgfire dist2word_mfgfire_urbsq [w=fua_pop]
* 0.99
drop dist2word_mfgfire_urbsq
drop urbshwup2000_*
 
* LOG PER CAPITA GDP WITH SQUARE, CUBE, AND FOURTH * 
sort cntry_name
merge cntry_name using lpcgdp2000
tab _m
drop if _m == 2
drop _m
codebook lpcgdp2000
gen lpcgdp2000_sq = lpcgdp2000*lpcgdp2000
gen lpcgdp2000_cub = lpcgdp2000*lpcgdp2000*lpcgdp2000
gen lpcgdp2000_four = lpcgdp2000*lpcgdp2000*lpcgdp2000*lpcgdp2000
foreach X in mfgfire {
xi: reg share_`X'_raw i.catsize*lpcgdp2000 i.catsize*lpcgdp2000_sq i.catsize*lpcgdp2000_cub i.catsize*lpcgdp2000_four i.capital [w=wt], robust clust(country)
predict share_`X'_r, resid
ren share_`X'_r dist2word_`X'_lpcgdpsq
}
sum share_mfgfire dist2word_mfgfire_lpcgdpsq
corr share_mfgfire dist2word_mfgfire_lpcgdpsq  [w=fua_pop]
* 0.97
drop dist2word_mfgfire_lpcgdpsq
drop lpcgdp2000* 

* NOT USING URBANIZATION OR PER CAPITA GDP CONTROLS *
 foreach X in mfgfire {
xi: reg share_`X'_raw i.catsize i.capital [w=wt], robust clust(country)
predict share_`X'_r, resid
ren share_`X'_r dist2word_`X'_nourb
}
sum share_mfgfire dist2word_mfgfire_nourb
corr share_mfgfire dist2word_mfgfire_nourb  [w=fua_pop]
* 0.99
drop dist2word_mfgfire_nourb

* CORRELATION WITH THE NAIVE SHARES *
sum share_mfgfire share_mfgfire_raw
corr share_mfgfire share_mfgfire_raw [w=fua_pop]
* 0.89

* WITHOUT THE MODIFIED WEIGHTS *
 foreach X in mfgfire {
xi: reg share_`X'_raw i.catsize*urbshwup2000 i.capital [w=fua_pop], robust clust(country)
predict share_`X'_r, resid
ren share_`X'_r dist2world_`X'_wtpop
}
sum share_mfgfire dist2world_mfgfire_wtpop
corr share_mfgfire dist2world_mfgfire_wtpop  [w=fua_pop]
* 1.00
drop dist2world_mfgfire_wtpop 

* WITHOUT WEIGHTS *
 foreach X in mfgfire {
xi: reg share_`X'_raw i.catsize*urbshwup2000 i.capital, robust clust(country)
predict share_`X'_r, resid
ren share_`X'_r dist2world_`X'_nowt
}
sum share_mfgfire dist2world_mfgfire_nowt
corr share_mfgfire dist2world_mfgfire_nowt 
corr share_mfgfire dist2world_mfgfire_nowt [w=fua_pop]
* 1.00
drop dist2world_mfgfire_nowt

** USING OTHER CATEGORIZATION **
* Raw (10 cats instead of 9 cats)
 foreach X in mfgfire {
xi: reg share_`X'_raw i.catsizeraw*urbshwup2000 i.capital [w=wt], robust clust(country)
predict share_`X'_r, resid
ren share_`X'_r dist2world_`X'_catraw
}
sum share_mfgfire dist2world_mfgfire_catraw
corr share_mfgfire dist2world_mfgfire_catraw  [w=fua_pop]
* 1.00
* Raw (5 cats instead of 4 cats)
 foreach X in mfgfire {
xi: reg share_`X'_raw i.catsize5*urbshwup2000 i.capital [w=wt], robust clust(country)
predict share_`X'_r, resid
ren share_`X'_r dist2world_`X'_cat5
}
sum share_mfgfire dist2world_mfgfire_cat5
corr share_mfgfire dist2world_mfgfire_cat5  [w=fua_pop]
* 0.99
drop dist2world_mfgfire_catraw 
drop dist2world_mfgfire_cat5

** CONTROLS FOR URBAN DEFINITION **
* We add information on the urban definition (from above)
sort cntry_name
merge cntry_name using urban_definition2
tab _m
drop if _m == 2
drop _m
gen type = "admin" if Admin == 1
replace type = "thre" if Threshold == 1
replace type = "cond" if Condition == 1
replace type = "adminthre" if Admin_or_thre == 1
tab type, m
ren Threshold_level threshold_level
tab threshold_level type, m 
gen threshold = (threshold_level != .)
tab threshold_level threshold, m
gen lthreshold_level = 0
replace lthreshold_level = log(threshold_level) if threshold_level != .
tab lthreshold_level threshold, m
tab lthreshold_level type, m
foreach X in mfgfire {
xi: reg share_`X'_raw i.catsize*urbshwup2000 i.capital i.type i.threshold|lthreshold_level [w=wt], robust clust(country)
predict share_`X'_r, resid
ren share_`X'_r dist2world_`X'_defi
}
sum share_mfgfire dist2world_mfgfire_defi
corr share_mfgfire dist2world_mfgfire_defi [w=wt]
* 0.91
drop dist2world_mfgfire_defi
drop *threshold* type Admin Threshold Condition Admin_or_thre

* USING UNT AS AN ALTERNATIVE SECTOR *
egen share_tradeotserv = rsum(share_trade share_other_serv)
sum share_tradeotserv
* 6865
egen share_tradehhserv = rsum(share_trade share_house_serv)
sum share_tradehhserv
* 6865
egen share_tradebothserv = rsum(share_trade share_other_serv share_house_serv)
sum share_tradebothserv
* 6865
egen share_tradebothserv2 = rsum(share_trade share_other_serv share_house_serv share_serv_notsp)
sum share_tradebothserv2
* 6865
foreach X in trade tradeotserv tradehhserv tradebothserv tradebothserv2 {
xi: reg share_`X' i.catsize*urbshwup2000 i.capital [w=wt], robust clust(country)
predict share_`X'_r, resid
ren share_`X'_r dist2world_`X'
label var dist2world_`X' "Residual estimated for sector `X'"
label var share_`X' "Share for sector `X'"
}
foreach X in tradebothserv2 {
desc dist2world_`X', f
sum share_mfgfire dist2world_`X'
corr share_mfgfire dist2world_`X' [w=fua_pop]
}
* -0.53

* USING UNT ALSO INCLUDING THE GOVT *
sum share_govmt
* 6865
egen share_tradegvt = rsum(share_tradebothserv share_govmt)
foreach X in tradegvt {
xi: reg share_`X' i.catsize*urbshwup2000 i.capital [w=wt], robust clust(country)
predict share_`X'_r, resid
ren share_`X'_r dist2word_`X'
}
foreach X in tradegvt {
desc dist2word_`X', f
sum share_mfgfire dist2word_`X'
corr share_mfgfire dist2word_`X' [w=fua_pop]
}
* -0.60
drop share_tradegvt dist2word_tradegvt

** SELF-EMPLOYMENT ** 
desc sh_*, f
count
* 6865
sum sh_*
* 5126 max 
gen maxself = (sh_wage != . | sh_self != . | sh_unpaid != . | sh_other != .)
tab maxself
gen share_wage = sh_wage if maxself == 1
gen share_self = sh_self if maxself == 1
egen share_selfunp = rsum(sh_self sh_unpaid) if maxself == 1 
sum share_selfunp 
egen share_selfunpot = rsum(sh_self sh_unpaid sh_other) if maxself == 1
sum share_selfunpot 
foreach X in wage self selfunp selfunpot {
xi: reg share_`X' i.catsize*urbshwup2000 i.capital [w=wt] if maxself == 1, robust clust(country)
predict share_`X'_r, resid
ren share_`X'_r dist2world_`X'
}
foreach X in wage self selfunp selfunpot {
desc dist2world_`X', f
sum share_mfgfire dist2world_`X'
corr share_mfgfire dist2world_`X' [w=fua_pop]
drop dist2world_`X'
drop share_`X'
}
* About -0.45
drop maxself
drop cntry_iso _Icatsize* _IcatXurbsh* _Icapital*
save temp2, replace
save data_for_mapping, replace

*** Adding urban share circa 2020 ***
 
* World Urbanization Prospects 2018 Revision
* Source: https://population.un.org/wup/
* Last accessed: 03-20-2021
clear
import excel "wup urban 2018.xlsx", sheet("Sheet1") firstrow clear
drop Countrycode
gen country = Regionsubreg
drop Regionsubreg
gen country_wup = country
replace country = subinstr(country, " ", "", .)
reshape long y, i(country) j(year)
ren y urbshwup
ren year year5
replace country = "Bolivia" if country == "Bolivia(PlurinationalStateof)"
replace country = "Burkina Faso" if country == "BurkinaFaso"
replace country = "Costa Rica" if country == "CostaRica"
replace country = "Dominican Republic" if country == "DominicanRepublic"
replace country = "El Salvador" if country == "ElSalvador"
replace country = "Iran" if country == "Iran(IslamicRepublicof)"
replace country = "Kyrgyz Republic" if country == "Kyrgyzstan"
replace country = "Laos" if country == "LaoPeople'sDemocraticRepublic"
replace country = "Palestine" if country == "StateofPalestine"
replace country = "Papua New Guinea" if country == "PapuaNewGuinea"
replace country = "Puerto Rico" if country == "PuertoRico"
replace country = "Russia" if country == "RussianFederation"
replace country = "Saint Lucia" if country == "SaintLucia"
replace country = "Sierra Leone" if country == "SierraLeone"
replace country = "South Africa" if country == "SouthAfrica"
replace country = "South Sudan" if country == "SouthSudan"
replace country = "Tanzania" if country == "UnitedRepublicofTanzania"
replace country = "Trinidad and Tobago" if country == "TrinidadandTobago"
replace country = "United Kingdom" if country == "UnitedKingdom"
replace country = "United States" if country == "UnitedStatesofAmerica"
replace country = "Venezuela" if country == "Venezuela(BolivarianRepublicof)"
replace country = "Vietnam" if country == "VietNam"
keep if year == 2020
drop year*
ren urbshwup urbshwup2020
sort country
save urbwup2020, replace 

use data_for_mapping, clear
replace country = "Brazil" if country == "Brasil"
replace country = "United States" if country == "Usa"
replace country = "Costa Rica" if country == "Costarica"
replace country = "Dominican Republic" if country == "Dominicanrepublic"
replace country = "El Salvador" if country == "Elsalvador"
replace country = "Kyrgyz Republic" if country == "Kyrgyz"
replace country = "Papua New Guinea" if country == "Papuanewguinea"
replace country = "Sierra Leone" if country == "Sierraleone"
replace country = "South Africa" if country == "Southafrica"
sort country
merge country using urbwup2020
tab _m
*tab country if _m == 1
*tab country if _m == 2
drop _m
label var urbshwup2020 "Urban share 2020"
label var country_wup "Country name (UN)"
save data_for_mapping, replace

*** Adding population circa 2020 ***
* 000s

* World Urbanization Prospects 2018 Revision
* Source: https://population.un.org/wup/
* Last accessed: 03-21-2021
clear
import excel "wup pop 2019.xlsx", sheet("Sheet1") firstrow clear
gen country = Regionsubreg
drop Regionsubreg
gen country_wup = country
replace country = subinstr(country, " ", "", .)
reshape long y, i(country) j(year)
ren y popwup
replace popwup = subinstr(popwup, " ", "", .)
destring popwup, replace
replace country = "Bolivia" if country == "Bolivia(PlurinationalStateof)"
replace country = "Burkina Faso" if country == "BurkinaFaso"
replace country = "Costa Rica" if country == "CostaRica"
replace country = "Dominican Republic" if country == "DominicanRepublic"
replace country = "El Salvador" if country == "ElSalvador"
replace country = "Iran" if country == "Iran(IslamicRepublicof)"
replace country = "Kyrgyz Republic" if country == "Kyrgyzstan"
replace country = "Laos" if country == "LaoPeople'sDemocraticRepublic"
replace country = "Palestine" if country == "StateofPalestine"
replace country = "Papua New Guinea" if country == "PapuaNewGuinea"
replace country = "Puerto Rico" if country == "PuertoRico"
replace country = "Russia" if country == "RussianFederation"
replace country = "Saint Lucia" if country == "SaintLucia"
replace country = "Sierra Leone" if country == "SierraLeone"
replace country = "South Africa" if country == "SouthAfrica"
replace country = "South Sudan" if country == "SouthSudan"
replace country = "Tanzania" if country == "UnitedRepublicofTanzania"
replace country = "Trinidad and Tobago" if country == "TrinidadandTobago"
replace country = "United Kingdom" if country == "UnitedKingdom"
replace country = "United States" if country == "UnitedStatesofAmerica"
replace country = "Venezuela" if country == "Venezuela(BolivarianRepublicof)"
replace country = "Vietnam" if country == "VietNam"
keep if year == 2020
drop year*
ren popwup popwup2020
sort country 
save popwup2020, replace 

use data_for_mapping, clear
replace country = "Brazil" if country == "Brasil"
replace country = "United States" if country == "Usa"
replace country = "Costa Rica" if country == "Costarica"
replace country = "Dominican Republic" if country == "Dominicanrepublic"
replace country = "El Salvador" if country == "Elsalvador"
replace country = "Kyrgyz Republic" if country == "Kyrgyz"
replace country = "Papua New Guinea" if country == "Papuanewguinea"
replace country = "Sierra Leone" if country == "Sierraleone"
replace country = "South Africa" if country == "Southafrica"
*replace country = "" if country == "CaribbeanNetherlands"
replace country = "FaroeIslands" if country == "FaeroeIslands"
replace country = "Eswatini" if country == "Swaziland"
replace country = "NorthMacedonia" if country == "TFYRMacedonia"
sort country
merge country using popwup2020
tab _m
tab country if _m == 1
tab country if _m == 2
drop _m
label var popwup2020 "Total population 2020"
label var country_wup "Country name (UN)"
save data_for_mapping, replace

****************************************
****************************************
**# THEIL DECOMPOSITION FOR THE WORLD #1
****************************************
****************************************

* Theil decomposition for MFG and FIRE *
use data_for_mapping, clear
egen countrynum = group(cntry_name)
sum dist2word_mfgfire, d
egen mindist2word_mfgfire = min(dist2word_mfgfire)
replace dist2word_mfgfire = dist2word_mfgfire + abs(mindist2word_mfgfire)
sum dist2word_mfgfire, d
theildeco dist2word_mfgfire [aw = fua_pop_2015], byg(countrynum)
* btw = 45%
* within = 55%

****************
****************
**# FIGURE 1 #2
****************
****************

* To create the figure, we need to know the mid-pop of each category. 
* We use the temporary data set that we had created above. 
use temp, clear
bysort catsize: sum catsize_midpop
gen expcatsize_midpop =exp(catsize_midpop)
bysort catsize: sum expcatsize_midpop
tab catsize
collapse (mean) fua_pop [w=fua_pop], by(catsize)
sort catsize
save temppop, replace
* These are the coefficients that we generated for Table D1 above. 
use coef_world, clear
gen var2 = substr(var,1,6)
tab var2
keep if var2 == "_Icats"
gen cat = substr(var,-1,1)
destring cat, replace
ren coef coef_base
keep cat coef_base
order cat
sort cat
save coef_temp, replace
* We now create the figure * 
use coef_world, clear
gen var2 = substr(var,1,6)
tab var2
keep if var2 == "_IcatX"
gen cat = substr(var,-1,1)
destring cat, replace
ren coef coef_interact
keep cat coef_interact
sort cat
merge cat using coef_temp
tab _m
drop _m
* * going from 10 to 90th percentile = 86.073-22.309 = 63.76
gen coef_20urb = coef_base + 20*coef_interact + .02912809*20
gen coef_85urb = coef_base + 85*coef_interact + .02912809*85
ren cat catsize
sort catsize
merge catsize using temppop
tab _m
drop _m
bysort catsize: sum fua_pop 
* we express in millions
replace fua_pop = fua_pop/1000000
replace fua_pop = round(fua_pop,0.1)
gen pop = string(fua_pop)
drop if catsize == 1
sort catsize
bysort catsize: sum fua_pop 

***** FIGURE 1 *****
twoway (connected coef_20urb catsize, mcolor(green) lcolor(green) lwidth(medthick) msymbol(triangle)) (connected coef_85urb catsize, mcolor(blue) lcolor(blue) lwidth(medthick) msymbol(triangle)), ytitle(FUA Empl. Share of Manufacturing + FIRE (%, 2000)) xtitle(FUA Population Size Category (Millions, 2000s)) xlabel(2(1)9) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) legend(order(1 "Countries with Urb. = 20% in 2000" 2 "Urb. = 85%") row(1) position(6))
* We manually modify the figure and then export it
graph use "reg_NEW.gph"
* old name 
graph export "Figures/fig_reg_urb20vs85.png", replace width(2620) height(1908)
* new name 
graph export "Figures/figure1.png", replace width(2620) height(1908)

*****************************************
*****************************************
*** CREATION OF THE DATA FOR THE MAPS ***
*****************************************
*****************************************

********************************
*** BOUNDARIES FOR THE WORLD ***
********************************

** These are country boundaries that we found on the website of Arcgis **
* Source: https://hub.arcgis.com/maps/UIA::uia-latitude-longitude-graticules-and-world-countries-boundaries/about
* Last accessed: 03-05-2021
*ssc install spmap
*ssc install shp2dta
*ssc install mif2dta
*shp2dta using "99bfd9e7-bb42-4728-87b5-07f8c8ac631c2020328-1-1vef4ev.lu5nk.shp", database(usdb) coordinates(uscoord) genid(id)
*use uscoord.dta, clear
*keep if _Y >= -57
*count 
*save boundariesold, replace
*count 
* The file is saved as "uscoord"

** Boundaries from the World Bank's official boundaries database **
* Source: https://datacatalog.worldbank.org/search/dataset/0038272/World-Bank-Official-Boundaries
* Last accessed: 10-03-2021
* This imports into Stata the shapefile obtained from the website 
*ssc install spmap
*ssc install shp2dta
*ssc install mif2dta
*shp2dta using "ne_50m_admin_0_countries_WB_GTA_Data2020501.shp", database(wbdb) coordinates(wbcoord) genid(id)
*count
* The file is saved as "wbcoord"

* We combine the two files and get thus boundaries from the two (which may overlap). 
use wbcoord, clear
count
keep if _Y >= -57
append using boundariesold
count
save boundaries, replace

*************************
*** DATA FOR THE MAPS ***
*************************

** Comparison of the shares **
use data_for_mapping, clear
* We change one city name 
replace efua_name = "Shajing" if efua_name == "Shajing Residential District"
sum share_mfgfire_raw [w=fua_pop], d
sum share_mfgfire [w=fua_pop], d
* First, we create the "distance" to the world average
foreach X in  mfg fire {
ren share_`X' dist2word_`X'
}
ren x lon
ren y lat
keep if lon != .
* We create four categories 
* There should not be 0s 

*** MFGFIRE AND MFG OR FIRE *** 
* Based on this distribution, we create five categories. 
sum dist2word_mfgfire [w=fua_pop], d
* Mean 0 
* -15 -10 -5 5 10 15
sum dist2word_mfgfire, d
* 5 15 15 ok
*foreach X in mfgfire {
*twoway (kdensity dist2word_`X' [w=fua_pop] if dist2word_`X' < 50), legend(order(1 "World") row(1)) ytitle(Kernel Dist. Residualized Urban Sh. `X' in FUA) xline(-15 -10 -5 0 5 10 15, lcolor(gs8) lpattern(dash) lwidth(medium))
*}
* We create the categories
foreach X in mfgfire mfg fire {
gen cat_`X' = ""
replace cat_`X' = "Prod-High" if dist2word_`X' > 15 
replace cat_`X' = "Prod-Mid" if dist2word_`X' > 10 & dist2word_`X' <= 15
replace cat_`X' = "Prod-Low" if dist2word_`X' > 5 & dist2word_`X' <= 10
replace cat_`X' = "Neutral" if dist2word_`X' > -5 & dist2word_`X' <= 5
replace cat_`X' = "Cons-Low" if dist2word_`X' > -10 & dist2word_`X' <= -5
replace cat_`X' = "Cons-Mid" if dist2word_`X' > -15 & dist2word_`X' <= -10
replace cat_`X' = "Cons-High" if dist2word_`X' <= -15
}
tab cat_mfgfire, m
bysort cat_mfgfir: sum dist2word_mfgfire
* We create the supercategory for FIRE and MFG
tab cat_mfg cat_fire
gen cat_mfgORfire = ""
replace cat_mfgORfire = "MFG-High" if cat_mfgORfire == "" & cat_mfg == "Prod-High"
replace cat_mfgORfire = "FIRE-High" if cat_mfgORfire == "" & cat_fire == "Prod-High"
replace cat_mfgORfire = "MFG-Mid" if cat_mfgORfire == "" & cat_mfg == "Prod-Mid"
replace cat_mfgORfire = "FIRE-Mid" if cat_mfgORfire == "" & cat_fire == "Prod-Mid"
* There are only 17 cases with both MFG and FIRE
* MFG has many more "low" cities. In that case, we give them to FIRE. 
replace cat_mfgORfire = "FIRE-Low" if cat_mfgORfire == "" & cat_fire == "Prod-Low"
replace cat_mfgORfire = "MFG-Low" if cat_mfgORfire == "" & cat_mfg == "Prod-Low"
tab cat_mfgORfire, m
label var cat_mfgfire "Category: MFGFIRE"
label var cat_mfg "Category: MFG"
label var cat_fire "Category: FIRE"
label var cat_mfgORfire "Category: MFG or FIRE"
* We add and trim the map of boundaries. 
append using boundaries
drop if _X != . & _X < -130
drop if _X != . & _X > 150
drop if _Y != . & _Y > 70
* We drop Hawai 
drop if lon != . & lon < -125
* We drop some variables we don't need for this
drop p*adj* share_* sh_* *2000 
save temp_for_map, replace

*****************
*****************
**# TABLE 1 #12
*****************
*****************

*** PANEL A ***
* POPULATION-WEIGHTED PCC COUNTRY INDEX *
use temp_for_map, clear
drop if lon == .
count
collapse (sum) fua_pop_2015, by(country_wb)
sort country_wb
save tempfuapop, replace
use temp_for_map, clear
drop if lon == .
count
* 6863
* Baseline measure = dist2word_mfgfire
ren dist2word_mfgfire pcc
collapse (mean) pcc [w=fua_pop_2015], by(country_wb popwup2020)
gsort- popwup2020
gen large25 = (_n <= 18)
* the 25th most populated country is South Africa
gsort- pcc
order country_wb pcc large25
sort country_wb
save templarge25, replace
count
* 74
gsort- pcc
export excel using "tables\table1A.xls", firstrow(variables) replace

** PANEL B **
* AVERAGE BY UN REGION *
* This is the list of regions 
sort country_wb
merge country_wb using list_unregions
tab _m
drop if _m == 2
drop _m
count
* This file is created above
sort country_wb
merge country_wb using tempfuapop
tab _m
drop if _m == 2
drop _m
count
* This is a list of 116 developing economies 
sort country_wb
merge country_wb using list116G
tab _m
tab country_wb if _m == 1
tab country_wb if _m == 2
gen G = (_m == 3)
drop if _m == 2
drop _m
count
* 74
gen pcc_G = pcc if G == 1
gen region = "LAC" if (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
replace region = "Asia" if (unregion == "Asia" & (unsubregion != "Western Asia"))
replace region = "SSA" if (unregion == "Africa" & (unsubregion != "North Africa"))
replace region = "MENA" if (unsubregion == "Northern Africa" | unsubregion == "Western Asia")
replace region = "Europe" if (unregion == "Europe")
replace region = "NorthAm" if (unregion == "North America" & region == "")
replace region = "Oceania" if (unregion == "Oceania")
tab unsubregion region, m
tab country_wb if region == ""
save temp, replace 
collapse (mean) pcc pcc_G [w=fua_pop_2015], by(unregion unsubregion)
gsort- pcc_G
export excel using "tables\table1B2_subregion.xls", firstrow(variables) replace
use temp, clear
collapse (mean) pcc pcc_G [w=fua_pop_2015], by(region)
export excel using "tables\table1B1_subregion.xls", firstrow(variables) replace
gsort- pcc

*****************
*****************
**# TABLE 3 #3
*****************
*****************

* GINI INDEX * 
use temp_for_map, clear
drop if lon == .
count
* 6863
* Baseline measure = dist2word_mfgfire
ren dist2word_mfgfire pcc
drop if pcc == .
sum pcc, d
* We shift the data to avoid 0s
egen minpcc = min(pcc)
replace pcc = pcc - minpcc
drop minpcc
sum pcc, d
* We shift again to make sure the smallest obs is above 0
replace pcc = pcc+2.024981
sum pcc, d
keep pcc country_wb fua_pop_2015
egen countrynum = group(country_wb)
replace fua_pop_2015 = fua_pop_2015*1000
sum countrynum
save temp, replace
* We calculate the Gini for each country
foreach X of numlist 1(1)74 {
use temp, clear
keep if countrynum==`X'
gen gini = .
ineqdeco pcc [fw=fua_pop_2015]
replace gini = $S_gini
keep country_wb countrynum gini
bysort country_wb: keep if _n == 1
save temp`X', replace
}
* We combine them,
use temp1, clear
foreach X of numlist 2(1)74 {
append using temp`X'
}
gsort- gini
sort country_wb
merge country_wb using templarge25
drop if _m == 2
tab _m
drop _m
gsort- gini
count
replace gini = round(gini,0.01)
export excel using "tables\table3.xls", firstrow(variables) replace
* We save the Gini for the econometric analysis in the folder Stata
keep country_wb gini
sort country_wb
save list_ginis, replace

****************
****************
**# FIGURE 2 #6
****************
****************

* We create this figure in Stata to see what it looks like
* We then gave the data to the mapping department of the World Bank which then created nicer versions of it
* In the folder "Figures created by the map department of the World Bank"
** FIGURE 2 **
** WORLD **
use temp_for_map, clear
codebook country
keep if (lon == . & _X < 140) | (lon != . & lon <= 140)
keep if (lat == . & _Y < 60) | (lat != . & lat <= 60)
foreach X in mfgfire {
twoway (scatter _Y _X, mcolor(gs10) msize(tiny) msymbol(circle) mlcolor(gs10) mlwidth(medthick)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Neutral", mcolor(gs7) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-Low", mcolor(cyan) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-Low", mcolor(gold) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-Mid", mcolor(midblue) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-Mid", mcolor(orange) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-High", mcolor(blue) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-High", mcolor(red) msize(tiny) msymbol(circle_hollow)) ///
, legend(order(8 "Cons-High" 6 "Cons-Mid" 4 "Cons-Low" 3 "Prod-Low" 5 "Prod-Mid" 7 "Prod-High") row(1) position(6)) xtitle(,) ytitle(.) ytitle(, size(zero)) xtitle(.) xtitle(, size(zero)) ///
xlabel(-125(25)125) ylabel(-50(10)60) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) 
graph export "Figures\fig_world.png", replace width(2620) height(1908)
graph export "Figures\figure2.png", replace width(2620) height(1908)
}

****************
****************
**# FIGURE D1 #7
****************
****************

* We create this figure in Stata to see what it looks like
* We then gave the data to the mapping department of the World Bank which then created nicer versions of it
* In the folder "Figures created by the map department of the World Bank"
** FIGURE D1 **
** ASIA **
use temp_for_map, clear
keep if (lon == . & _X < 135) | (lon != . & lon <= 135)
keep if (lon == . & _X > 40) | (lon != . & lon > 40)
keep if (lat == . & _Y > -10) | (lat != . & lat > -10)
keep if (lat == . & _Y < 50) | (lat != . & lat < 50)
foreach X in mfgfire {
twoway (scatter _Y _X, mcolor(gs10) msize(tiny) msymbol(circle) mlcolor(gs10) mlwidth(medthick)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Neutral", mcolor(gs7) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-Low", mcolor(cyan) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-Low", mcolor(gold) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-Mid", mcolor(midblue) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-Mid", mcolor(orange) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-High", mcolor(blue) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-High", mcolor(red) msize(tiny) msymbol(circle_hollow)) ///
, legend(order(8 "Cons-High" 6 "Cons-Mid" 4 "Cons-Low" 3 "Prod-Low" 5 "Prod-Mid" 7 "Prod-High") row(1) position(6)) xtitle(,) ytitle(.) ytitle(, size(zero)) xtitle(.) xtitle(, size(zero)) ///
xlabel(40(10)130) ylabel(-10(10)50) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
graph export "figures\fig_ASIA.png", replace width(2620) height(1908)
graph export "figures\figureD1.png", replace width(2620) height(1908)
}

****************
****************
**# FIGURE D2 #8
****************
****************

* We create this figure in Stata to see what it looks like
* We then gave the data to the mapping department of the World Bank which then created nicer versions of it
* In the folder "Figures created by the map department of the World Bank"
** FIGURE D2 **
** AFRICA **
use temp_for_map, clear
keep if (lon == . & _X < 45) | (lon != . & lon <= 45)
keep if (lon == . & _X > -17) | (lon != . & lon > -17.5)
keep if (lat == . & _Y < 35) | (lat != . & lat <= 35)
keep if (lat == . & _Y >= -35) | (lat != . & lat >= -35)
foreach X in mfgfire {
twoway (scatter _Y _X, mcolor(gs10) msize(tiny) msymbol(circle) mlcolor(gs10) mlwidth(medthick)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Neutral", mcolor(gs7) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-Low", mcolor(cyan) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-Low", mcolor(gold) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-Mid", mcolor(midblue) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-Mid", mcolor(orange) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-High", mcolor(blue) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-High", mcolor(red) msize(tiny) msymbol(circle_hollow)) ///
, legend(order(8 "Cons-High" 6 "Cons-Mid" 4 "Cons-Low" 3 "Prod-Low" 5 "Prod-Mid" 7 "Prod-High") row(1) position(6)) xtitle(,) ytitle(.) ytitle(, size(zero)) xtitle(.) xtitle(, size(zero)) ///
xlabel(-15(10)45) ylabel(-35(10)35) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) 
graph export "Figures\fig_AFRICA.png", replace width(2620) height(1908)
graph export "Figures\figureD2.png", replace width(2620) height(1908)
}

****************
****************
**# FIGURE D3 #9
****************
****************

* We create this figure in Stata to see what it looks like
* We then gave the data to the mapping department of the World Bank which then created nicer versions of it
* In the folder "Figures created by the map department of the World Bank"
** FIGURE D3 **
** EUROPE **
use temp_for_map, clear
keep if (lon == . & _X > -15) | (lon != . & lon > -15)
keep if (lon == . & _X < 45) | (lon != . & lon < 45)
keep if (lat == . & _Y > 35) | (lat != . & lat > 35)
keep if (lat == . & _Y < 57) | (lat != . & lat < 57)
foreach X in mfgfire {
twoway (scatter _Y _X, mcolor(gs10) msize(tiny) msymbol(circle) mlcolor(gs10) mlwidth(medthick)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Neutral", mcolor(gs7) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-Low", mcolor(cyan) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-Low", mcolor(gold) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-Mid", mcolor(midblue) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-Mid", mcolor(orange) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-High", mcolor(blue) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-High", mcolor(red) msize(tiny) msymbol(circle_hollow)) ///
, legend(order(8 "Cons-High" 6 "Cons-Mid" 4 "Cons-Low" 3 "Prod-Low" 5 "Prod-Mid" 7 "Prod-High") row(1) position(6)) xtitle(,) ytitle(.) ytitle(, size(zero)) xtitle(.) xtitle(, size(zero)) ///
xlabel(-10(10)30) ylabel(35(5)55) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
graph export "Figures\fig_EURO.png", replace width(2620) height(1908)
graph export "Figures\figureD3.png", replace width(2620) height(1908)
}

*****************
*****************
**# FIGURE D4 #10
*****************
*****************

* We create this figure in Stata to see what it looks like
* We then gave the data to the mapping department of the World Bank which then created nicer versions of it
* In the folder "Figures created by the map department of the World Bank"
** FIGURE D4 **
** NORTH AM **
use temp_for_map, clear
keep if (lon == . & _X < -33) | (lon != . & lon <= -33)
keep if (lon == . & _X > -126) | (lon != . & lon > -126)
keep if (lat == . & _Y > 10) | (lat != . & lat > 10)
keep if (lat == . & _Y < 55) | (lat != . & lat < 55)
keep if (lon == . & _X < -60) | (lon != . & lon < -60)
foreach X in mfgfire {
twoway (scatter _Y _X, mcolor(gs10) msize(tiny) msymbol(circle) mlcolor(gs10) mlwidth(medthick)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Neutral", mcolor(gs7) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-Low", mcolor(cyan) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-Low", mcolor(gold) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-Mid", mcolor(midblue) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-Mid", mcolor(orange) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-High", mcolor(blue) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-High", mcolor(red) msize(tiny) msymbol(circle_hollow)) ///
, legend(order(8 "Cons-High" 6 "Cons-Mid" 4 "Cons-Low" 3 "Prod-Low" 5 "Prod-Mid" 7 "Prod-High") row(1) position(6)) xtitle(,) ytitle(.) ytitle(, size(zero)) xtitle(.) xtitle(, size(zero)) ///
xlabel(-120(10)-60) ylabel(10(10)50) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
graph export "Figures\fig_NAM.png", replace width(2620) height(1908)
graph export "Figures\figureD4.png", replace width(2620) height(1908)
}

*****************
*****************
**# FIGURE D5 #11
*****************
*****************

* We create this figure in Stata to see what it looks like
* We then gave the data to the mapping department of the World Bank which then created nicer versions of it
* In the folder "Figures created by the map department of the World Bank"
** FIGURE D5 **
** SOUTH AM **
use temp_for_map, clear
keep if (lon == . & _X < -33) | (lon != . & lon <= -33)
keep if (lon == . & _X > -126) | (lon != . & lon > -126)
keep if (lat == . & _Y < 12) | (lat != . & lat <= 12)
keep if (lon == . & _X > -82) | (lon != . & lon > -82)
foreach X in mfgfire {
twoway (scatter _Y _X, mcolor(gs10) msize(tiny) msymbol(circle) mlcolor(gs10) mlwidth(medthick)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Neutral", mcolor(gs7) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-Low", mcolor(cyan) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-Low", mcolor(gold) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-Mid", mcolor(midblue) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-Mid", mcolor(orange) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Prod-High", mcolor(blue) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "Cons-High", mcolor(red) msize(tiny) msymbol(circle_hollow)) ///
, legend(order(8 "Cons-High" 6 "Cons-Mid" 4 "Cons-Low" 3 "Prod-Low" 5 "Prod-Mid" 7 "Prod-High") row(1) position(6)) xtitle(,) ytitle(.) ytitle(, size(zero)) xtitle(.) xtitle(, size(zero)) ///
xlabel(-80(10)-40) ylabel(-40(10)10) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
graph export "Figures\fig_SAM.png", replace width(2620) height(1908)
graph export "Figures\figureD5.png", replace width(2620) height(1908)
}

***************
***************
**# TABLE 2 #4
***************
***************

*** LARGEST CITIES IN THE WORLD ***
* Keep if above 10 million
use temp_for_map, clear
gsort -fua_pop_2015
gen rank = _n
order rank efua_name cat_mfgfire dist2word_mfgfire fua_pop_2015 country
keep rank efua_name country fua_pop_2015 cat_mfgfire dist2word_mfgfire
ren efua_name Name
ren country Country 
replace fua_pop_2015 = fua_pop_2015 /1000000
keep if fua_pop_2015 >= 10
ren fua_pop_2015 Pop2000sMillion
ren cat_mfgfire Category
ren dist2word_mfgfire Residual
replace Residual = round(Residual,0.1)
ren Residual ResidualPct
replace Pop2000sMillion = round(Pop2000sMillion,0.1)
ren rank Rank
replace Country = "USA" if Country == "Usa"
replace Country = "Brazil" if Country == "Brasil"
replace Name = "Bangalore" if Name == "Bengaluru"
replace Name = "Delhi" if Name == "Delhi [New Delhi]"
replace Name = "Ho Chi Minh" if Name == "Ho Chi Minh City"
replace Name = "Manila" if Name == "Quezon City [Manila]"
replace Name = "Sao Paulo" if Name == "SÃ£o Paulo"
keep if Name != ""
export excel using "tables\table2.xls", firstrow(variables) replace

****************
****************
**# FIGURE 3 #5
****************
****************

* We create this figure in Stata to see what it looks like
* We then gave the data to the mapping department of the World Bank which then created nicer versions of it
* In the folder "Figures created by the map department of the World Bank"
*** FIGURE 3 IN STATA
*** VERSION OF THE FIGURE BEFORE THE WORLD BANK'S LAB CREATED THE FIGURE FOR THE DRAFT 
** WORLD **
* Weighted by pop and making the blue appear more
use temp_for_map, clear
tab cat_mfgORfire, m
keep if (lon == . & _X < 140) | (lon != . & lon <= 140)
keep if (lat == . & _Y < 60) | (lat != . & lat <= 60)
foreach X in mfgORfire {
twoway (scatter _Y _X, mcolor(gs10) msize(tiny) msymbol(circle) mlcolor(gs10) mlwidth(medthick)) ///
(scatter lat lon if cat_`X' == "", mcolor(gs7) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "MFG-Low", mcolor(lavender) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "FIRE-Low", mcolor(mint) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "MFG-Mid", mcolor(magenta) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "FIRE-Mid", mcolor(midgreen) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "MFG-High", mcolor(purple) msize(tiny) msymbol(circle_hollow)) ///
(scatter lat lon [w=fua_pop] if cat_`X' == "FIRE-High", mcolor(dkgreen) msize(tiny) msymbol(circle_hollow)) ///
, legend(order(3 "MFG-Low" 4 "FIRE-Low" 5 "MFG-Mid" 6 "FIRE-Mid" 7 "MFG-High" 8 "FIRE-High") row(1) position(6)) xtitle(,) ytitle(.) ytitle(, size(zero)) xtitle(.) xtitle(, size(zero)) ///
xlabel(-125(25)125) ylabel(-50(10)60) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) 
graph export "Figures\fig_world_mfgORfire.png", replace width(2620) height(1908)
graph export "Figures\figure3.png", replace width(2620) height(1908)
}

*****************
*****************
**# FIGURE 4 #3
*****************
*****************

* Population over time
* Source listed above
clear
import excel "wup pop 2019.xlsx", sheet("Sheet1") firstrow clear
gen country = Regionsubreg
drop Regionsubreg
gen country_wup = country
replace country = subinstr(country, " ", "", .)
reshape long y, i(country) j(year)
ren y popwup
replace popwup = subinstr(popwup, " ", "", .)
destring popwup, replace
replace country = "Bolivia" if country == "Bolivia(PlurinationalStateof)"
replace country = "Burkina Faso" if country == "BurkinaFaso"
replace country = "Costa Rica" if country == "CostaRica"
replace country = "Dominican Republic" if country == "DominicanRepublic"
replace country = "El Salvador" if country == "ElSalvador"
replace country = "Iran" if country == "Iran(IslamicRepublicof)"
replace country = "Kyrgyz Republic" if country == "Kyrgyzstan"
replace country = "Laos" if country == "LaoPeople'sDemocraticRepublic"
replace country = "Palestine" if country == "StateofPalestine"
replace country = "Papua New Guinea" if country == "PapuaNewGuinea"
replace country = "Puerto Rico" if country == "PuertoRico"
replace country = "Russia" if country == "RussianFederation"
replace country = "Saint Lucia" if country == "SaintLucia"
replace country = "Sierra Leone" if country == "SierraLeone"
replace country = "South Africa" if country == "SouthAfrica"
replace country = "South Sudan" if country == "SouthSudan"
replace country = "Tanzania" if country == "UnitedRepublicofTanzania"
replace country = "Trinidad and Tobago" if country == "TrinidadandTobago"
replace country = "United Kingdom" if country == "UnitedKingdom"
replace country = "United States" if country == "UnitedStatesofAmerica"
replace country = "Venezuela" if country == "Venezuela(BolivarianRepublicof)"
replace country = "Vietnam" if country == "VietNam"
sort country year
save popwup, replace 

* Urban share over time 
* Source listed above
clear
import excel "wup urban 2018.xlsx", sheet("Sheet1") firstrow clear
drop Countrycode
gen country = Regionsubreg
drop Regionsubreg
gen country_wup = country
replace country = subinstr(country, " ", "", .)
reshape long y, i(country) j(year)
ren y urbshwup
ren year year5
replace country = "Bolivia" if country == "Bolivia(PlurinationalStateof)"
replace country = "Burkina Faso" if country == "BurkinaFaso"
replace country = "Costa Rica" if country == "CostaRica"
replace country = "Dominican Republic" if country == "DominicanRepublic"
replace country = "El Salvador" if country == "ElSalvador"
replace country = "Iran" if country == "Iran(IslamicRepublicof)"
replace country = "Kyrgyz Republic" if country == "Kyrgyzstan"
replace country = "Laos" if country == "LaoPeople'sDemocraticRepublic"
replace country = "Palestine" if country == "StateofPalestine"
replace country = "Papua New Guinea" if country == "PapuaNewGuinea"
replace country = "Puerto Rico" if country == "PuertoRico"
replace country = "Russia" if country == "RussianFederation"
replace country = "Saint Lucia" if country == "SaintLucia"
replace country = "Sierra Leone" if country == "SierraLeone"
replace country = "South Africa" if country == "SouthAfrica"
replace country = "South Sudan" if country == "SouthSudan"
replace country = "Tanzania" if country == "UnitedRepublicofTanzania"
replace country = "Trinidad and Tobago" if country == "TrinidadandTobago"
replace country = "United Kingdom" if country == "UnitedKingdom"
replace country = "United States" if country == "UnitedStatesofAmerica"
replace country = "Venezuela" if country == "Venezuela(BolivarianRepublicof)"
replace country = "Vietnam" if country == "VietNam"
sort country year5
save urbwup, replace 

* Data set created above
use "dataset_maps_and_cities_clean_allyrs", clear
gen country_ipums = country
replace country_ipums = "Dominican Republic" if country_ipums == "Dominicanrepublic"
replace country_ipums = "El Salvador" if country_ipums == "Elsalvador"
replace country_ipums = "Papua New Guinea" if country_ipums == "Papuanewguinea"
replace country_ipums = "Sierra Leone" if country_ipums == "Sierraleone"
replace country_ipums = "South Sudan" if country_ipums == "Southsudan"
replace country_ipums = "United States" if country_ipums == "Usa"
replace country_ipums = "Brazil" if country_ipums == "Brasil"
tab year country
* We keep the 8 countries with enough data over time
replace country = "Brazil" if country == "Brasil"
keep if country == "Brazil" | country == "Mexico" | country == "Colombia" | country == "Argentina" | country == "Venezuela" | country == "Chile" | country == "Guatemala" | country == "Ecuador" | country == "Bolivia" | country == "Paraguay" | country == "Peru" | country == "Panama" 
gen count = 1
bysort country efua_id: egen sumcount = sum(count)
bysort country: egen maxsumcount = max(sumcount)
tab maxsumcount
* We create past FUA pop *
* For this one, we use the weighted avg of UCs in the FUA
gen ratio0015 = p00_adj_wgt/p15_adj_wgt
gen ratio9015 = p90_adj_wgt/p15_adj_wgt
gen ratio7515 = p75_adj_wgt/p15_adj_wgt
sum ratio*, d
gen fua_pop_2000 = fua_pop_2015*ratio0015 
gen fua_pop_1990 = fua_pop_2015*ratio9015 
gen fua_pop_1975 = fua_pop_2015*ratio7515 
*sum fua_pop_* if efua_name == "Buenos Aires"
foreach X in 1975 1990 2000 2015 {
gen lfua_pop_`X' = log(fua_pop_`X')
}
drop log_fua_pop
gen log_fua_pop = lfua_pop_1975 if year >= 1960 & year <= 1982
replace log_fua_pop = lfua_pop_1990 if year >= 1983 & year <= 1995
replace log_fua_pop = lfua_pop_2000 if year >= 1996 & year <= 2007
replace log_fua_pop = lfua_pop_2015 if year >= 2008 & year <= 2015
codebook log_fua_pop
gen fua_pop = exp(log_fua_pop)
sum fua_pop, d
drop if fua_pop == . | fua_pop < 50000
count
save evol, replace

** WE NOW CREATE THE FIGURE **

use evol, clear
keep if country == "Brazil" | country == "Mexico" | country == "Colombia" | country == "Argentina" | country == "Venezuela" | country == "Chile" | country == "Guatemala" | country == "Ecuador" | country == "Bolivia" | country == "Paraguay" | country == "Peru" | country == "Panama" 
tab country
* We drop some observations to have a balanced sample
drop if country == "Mexico" & year == 1960
drop if country == "Guatemala" & year < 1980
drop if country == "Paraguay" & year < 1980
drop if country == "Panama" & year < 1980
drop if country == "Mexico" & year == 1995
drop if country == "Brazil" & year == 2000
drop if country == "Mexico" & year == 2000
drop if country == "Ecuador" & year == 2000
drop if country == "Bolivia" & year == 2001
drop if country == "Panama" & year == 2000
drop if country == "Mexico" & year == 2015
* We create the decade variable
gen decade = .
replace decade = 1980 if year >= 1980 & year <= 1989
replace decade = 1990 if year >= 1990 & year <= 1999
replace decade = 2000 if year >= 2000 
tab year if decade == ., m
replace decade = 1980 if year >= 1960 & year <= 1980
tab country decade
ren year rawyear
ren decade year
* We add total pop
sort country year
merge country year using popwup
tab _merge
drop if _merge == 2
drop _merge
* We add the urban share
gen year5 = year
sort country year5
merge country year5 using urbwup
tab _merge
drop if _merge == 2
drop _merge
drop year5
codebook popwup urbshwup
sum urbshwup, d
gen upop = popwup/100*urbshwup
* We drop a few outlying observations to make the figure more legible.
drop if share_mfgfire >= 60
* We create the figure 
foreach X in mfgfire {
twoway (scatter share_`X' log_fua_pop [w = fua_pop] if year == 1980, mcolor(cyan) msize(small) msymbol(circle_hollow)) ///
(scatter share_`X' log_fua_pop [w = fua_pop] if year == 1990, mcolor(midblue) msize(small) msymbol(circle_hollow)) ///
(scatter share_`X' log_fua_pop [w = fua_pop] if year == 2000, mcolor(blue) msize(small) msymbol(circle_hollow)) ///
(qfit share_`X' log_fua_pop [w = fua_pop] if year == 1980, lcolor(cyan) lwidth(thick)) ///
(qfit share_`X' log_fua_pop [w = fua_pop] if year == 1990, lcolor(midblue) lwidth(thick)) ///
(qfit share_`X' log_fua_pop [w = fua_pop] if year == 2000, lcolor(blue) lwidth(thick)) ///
, legend(order(4 "1980 or Before" 5 "Early 1990s" 6 "c.~2010") row(1) position(6)) xlabel(11(1)17) xtitle(Log Population Size (Inh.) of Agglomeration) ytitle(Empl. Share (%) of MFGFIRE in the Agglomeration) ytitle(, size(medium) margin(medsmall)) xtitle(, size(medium) margin(medsmall)) graphregion(margin(small) fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ylabel(10(10)60)
graph export evol_LAC_`X'_legend.png, replace width(2620) height(1908)
}
* We modify manually
graph use "LAC.gph"
graph export "Figures\evol_LAC_mfgfire.png", replace width(2620) height(1908)
graph export "Figures\Figure4.png", replace width(2620) height(1908)



